COVID-19(CORONA VIRUS) : EXPLORATORY DATA ANALYSIS

3D_medical_animation_coronavirus_structure-0kardto2r8.jpg

In [74]:
# Installing Dependencies

!pip install pycountry_convert
!pip install folium
!wget https://raw.githubusercontent.com/tarunk04/COVID-19-CaseStudy-and-Predictions/master/models/model_deaths.h5
!wget https://raw.githubusercontent.com/tarunk04/COVID-19-CaseStudy-and-Predictions/master/models/model_confirmed.h5
!pip install keras
!pip install tensorflow
!pip install --upgrade pip
Requirement already satisfied: pycountry_convert in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (0.7.2)
Requirement already satisfied: pprintpp>=0.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pycountry_convert) (0.4.0)
Requirement already satisfied: pycountry>=16.11.27.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pycountry_convert) (19.8.18)
Requirement already satisfied: pytest>=3.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pycountry_convert) (3.5.1)
Requirement already satisfied: repoze.lru>=0.7 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pycountry_convert) (0.7)
Requirement already satisfied: wheel>=0.30.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pycountry_convert) (0.31.1)
Requirement already satisfied: pytest-cov>=2.5.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pycountry_convert) (2.8.1)
Requirement already satisfied: pytest-mock>=1.6.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pycountry_convert) (3.0.0)
Requirement already satisfied: py>=1.5.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pytest>=3.4.0->pycountry_convert) (1.5.3)
Requirement already satisfied: six>=1.10.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pytest>=3.4.0->pycountry_convert) (1.14.0)
Requirement already satisfied: setuptools in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pytest>=3.4.0->pycountry_convert) (46.1.3.post20200330)
Requirement already satisfied: attrs>=17.4.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pytest>=3.4.0->pycountry_convert) (18.1.0)
Requirement already satisfied: more-itertools>=4.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pytest>=3.4.0->pycountry_convert) (4.1.0)
Requirement already satisfied: pluggy<0.7,>=0.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pytest>=3.4.0->pycountry_convert) (0.6.0)
Requirement already satisfied: coverage>=4.4 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from pytest-cov>=2.5.1->pycountry_convert) (5.0.4)
Requirement already satisfied: folium in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (0.10.1)
Requirement already satisfied: branca>=0.3.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from folium) (0.4.0)
Requirement already satisfied: jinja2>=2.9 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from folium) (2.10)
Requirement already satisfied: requests in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from folium) (2.20.0)
Requirement already satisfied: numpy in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from folium) (1.18.2)
Requirement already satisfied: six in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from branca>=0.3.0->folium) (1.14.0)
Requirement already satisfied: MarkupSafe>=0.23 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from jinja2>=2.9->folium) (1.0)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from requests->folium) (3.0.4)
Requirement already satisfied: idna<2.8,>=2.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from requests->folium) (2.6)
Requirement already satisfied: urllib3<1.25,>=1.21.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from requests->folium) (1.23)
Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from requests->folium) (2020.4.5.1)
--2020-04-08 12:26:58--  https://raw.githubusercontent.com/tarunk04/COVID-19-CaseStudy-and-Predictions/master/models/model_deaths.h5
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.248.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.248.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 111008 (108K) [application/octet-stream]
Saving to: ‘model_deaths.h5’

model_deaths.h5     100%[===================>] 108.41K  --.-KB/s    in 0.003s  

2020-04-08 12:26:58 (33.9 MB/s) - ‘model_deaths.h5’ saved [111008/111008]

--2020-04-08 12:26:58--  https://raw.githubusercontent.com/tarunk04/COVID-19-CaseStudy-and-Predictions/master/models/model_confirmed.h5
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.248.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.248.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 111008 (108K) [application/octet-stream]
Saving to: ‘model_confirmed.h5’

model_confirmed.h5  100%[===================>] 108.41K  --.-KB/s    in 0.003s  

2020-04-08 12:26:58 (33.6 MB/s) - ‘model_confirmed.h5’ saved [111008/111008]

Requirement already satisfied: keras in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (2.3.1)
Requirement already satisfied: pyyaml in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from keras) (5.3.1)
Requirement already satisfied: scipy>=0.14 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from keras) (1.4.1)
Requirement already satisfied: h5py in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from keras) (2.8.0)
Requirement already satisfied: keras-applications>=1.0.6 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from keras) (1.0.8)
Requirement already satisfied: numpy>=1.9.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from keras) (1.18.2)
Requirement already satisfied: six>=1.9.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from keras) (1.14.0)
Requirement already satisfied: keras-preprocessing>=1.0.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from keras) (1.1.0)
Requirement already satisfied: tensorflow in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (2.0.0)
Requirement already satisfied: astor>=0.6.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (0.8.1)
Requirement already satisfied: numpy<2.0,>=1.16.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (1.18.2)
Requirement already satisfied: absl-py>=0.7.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (0.9.0)
Requirement already satisfied: google-pasta>=0.1.6 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (0.2.0)
Requirement already satisfied: grpcio>=1.8.6 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (1.28.1)
Collecting gast==0.2.2
  Downloading gast-0.2.2.tar.gz (10 kB)
Requirement already satisfied: wrapt>=1.11.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (1.12.1)
Requirement already satisfied: wheel>=0.26 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (0.31.1)
Requirement already satisfied: termcolor>=1.1.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (1.1.0)
Requirement already satisfied: opt-einsum>=2.3.2 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (3.1.0)
Requirement already satisfied: keras-preprocessing>=1.0.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (1.1.0)
Requirement already satisfied: tensorflow-estimator<2.1.0,>=2.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (2.0.0)
Requirement already satisfied: keras-applications>=1.0.8 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (1.0.8)
Requirement already satisfied: six>=1.10.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (1.14.0)
Requirement already satisfied: tensorboard<2.1.0,>=2.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (2.0.0)
Requirement already satisfied: protobuf>=3.6.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorflow) (3.11.4)
Requirement already satisfied: h5py in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from keras-applications>=1.0.8->tensorflow) (2.8.0)
Requirement already satisfied: setuptools>=41.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow) (46.1.3.post20200330)
Requirement already satisfied: werkzeug>=0.11.15 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow) (0.14.1)
Requirement already satisfied: markdown>=2.6.8 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow) (3.1.1)
Building wheels for collected packages: gast
  Building wheel for gast (setup.py) ... done
  Created wheel for gast: filename=gast-0.2.2-py3-none-any.whl size=6587 sha256=095b377f4e520fb8f9e4c9f28a9eb2631d0a61df62f4233b2ee7d910c1c463f2
  Stored in directory: /home/ec2-user/.cache/pip/wheels/19/a7/b9/0740c7a3a7d1d348f04823339274b90de25fbcd217b2ee1fbe
Successfully built gast
Installing collected packages: gast
  Attempting uninstall: gast
    Found existing installation: gast 0.3.3
    Uninstalling gast-0.3.3:
      Successfully uninstalled gast-0.3.3
Successfully installed gast-0.2.2
Requirement already up-to-date: pip in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (20.0.2)
In [75]:
#!pip install tensorflow
!conda install tensorflow --yes
Solving environment: done


==> WARNING: A newer version of conda exists. <==
  current version: 4.5.12
  latest version: 4.8.3

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

In [76]:
!pip install imgkit
Requirement already satisfied: imgkit in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (1.0.2)
In [77]:
!pip install wkhtmltopdf
Requirement already satisfied: wkhtmltopdf in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (0.2)
In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker
import pycountry_convert as pc
import folium
from datetime import datetime,timedelta, date
from scipy.interpolate import make_interp_spline, BSpline
import plotly.express as px
import json, requests
from pandas.plotting import table

from keras.layers import Input, Dense, Activation, LeakyReLU
from keras import models
from keras.optimizers import RMSprop,Adam
import imgkit


%matplotlib inline
In [79]:
df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
df_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
df_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
df_covid19 = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")
df_table = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_time.csv",parse_dates=['Last_Update'])
In [80]:
df_covid19.head()
Out[80]:
Country_Region Last_Update Lat Long_ Confirmed Deaths Recovered Active
0 Australia 2020-04-08 12:15:00 -25.0000 133.0000 6010 50 1080 4880
1 Austria 2020-04-08 12:07:52 47.5162 14.5501 12824 273 4512 8039
2 Canada 2020-04-08 12:14:43 60.0010 -95.0010 17897 381 4047 0
3 China 2020-04-08 09:36:20 30.5928 114.3055 82809 3337 77565 1907
4 Denmark 2020-04-08 12:07:52 56.0000 10.0000 5581 218 1762 3601
In [81]:
df_confirmed.head()
Out[81]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20 4/7/20
0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 ... 120 170 174 237 273 281 299 349 367 423
1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 ... 212 223 243 259 277 304 333 361 377 383
2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 ... 511 584 716 847 986 1171 1251 1320 1423 1468
3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 ... 334 370 376 390 428 439 466 501 525 545
4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 ... 7 7 7 8 8 8 10 14 16 17

5 rows × 81 columns

In [82]:
df_deaths.head()
Out[82]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20 4/7/20
0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 ... 4 4 4 4 6 6 7 7 11 14
1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 ... 10 11 15 15 16 17 20 20 21 22
2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 ... 31 35 44 58 86 105 130 152 173 193
3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 ... 6 8 12 14 15 16 17 18 21 22
4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 ... 2 2 2 2 2 2 2 2 2 2

5 rows × 81 columns

In [83]:
df_recovered.head()
Out[83]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20 4/7/20
0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 ... 2 2 5 5 10 10 10 15 18 18
1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 ... 33 44 52 67 76 89 99 104 116 131
2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 ... 31 37 46 61 61 62 90 90 90 113
3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 ... 1 10 10 10 10 16 21 26 31 39
4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 ... 0 0 1 1 1 1 2 2 2 2

5 rows × 81 columns

In [84]:
df_confirmed = df_confirmed.rename(columns={'Province/State':'state','Country/Region':'country'})
df_deaths = df_deaths.rename(columns={'Province/State':'state','Country/Region':'country'})
df_recovered = df_recovered.rename(columns={'Province/State':'state','Country/Region':'country'})
df_covid19 = df_covid19.rename(columns={'Country_Region':'country'})
df_covid19['Active'] = df_covid19['Confirmed'] - df_covid19['Recovered'] - df_covid19['Deaths']
In [85]:
# Changing the conuntry names as required by pycountry_convert Lib
df_confirmed.loc[df_confirmed['country'] == "US", "country"] = "USA"
df_deaths.loc[df_deaths['country'] == "US", "country"] = "USA"
df_covid19.loc[df_covid19['country'] == "US", "country"] = "USA"
df_table.loc[df_table['Country_Region'] == "US", "Country_Region"] = "USA"
df_recovered.loc[df_recovered['country'] == "US", "country"] = "USA"


df_confirmed.loc[df_confirmed['country'] == 'Korea, South', "country"] = 'South Korea'
df_deaths.loc[df_deaths['country'] == 'Korea, South', "country"] = 'South Korea'
df_covid19.loc[df_covid19['country'] == "Korea, South", "country"] = "South Korea"
df_table.loc[df_table['Country_Region'] == "Korea, South", "Country_Region"] = "South Korea"
df_recovered.loc[df_recovered['country'] == 'Korea, South', "country"] = 'South Korea'

df_confirmed.loc[df_confirmed['country'] == 'Taiwan*', "country"] = 'Taiwan'
df_deaths.loc[df_deaths['country'] == 'Taiwan*', "country"] = 'Taiwan'
df_covid19.loc[df_covid19['country'] == "Taiwan*", "country"] = "Taiwan"
df_table.loc[df_table['Country_Region'] == "Taiwan*", "Country_Region"] = "Taiwan"
df_recovered.loc[df_recovered['country'] == 'Taiwan*', "country"] = 'Taiwan'

df_confirmed.loc[df_confirmed['country'] == 'Congo (Kinshasa)', "country"] = 'Democratic Republic of the Congo'
df_deaths.loc[df_deaths['country'] == 'Congo (Kinshasa)', "country"] = 'Democratic Republic of the Congo'
df_covid19.loc[df_covid19['country'] == "Congo (Kinshasa)", "country"] = "Democratic Republic of the Congo"
df_table.loc[df_table['Country_Region'] == "Congo (Kinshasa)", "Country_Region"] = "Democratic Republic of the Congo"
df_recovered.loc[df_recovered['country'] == 'Congo (Kinshasa)', "country"] = 'Democratic Republic of the Congo'

df_confirmed.loc[df_confirmed['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"
df_deaths.loc[df_deaths['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"
df_covid19.loc[df_covid19['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"
df_table.loc[df_table['Country_Region'] == "Cote d'Ivoire", "Country_Region"] = "Côte d'Ivoire"
df_recovered.loc[df_recovered['country'] == "Cote d'Ivoire", "country"] = "Côte d'Ivoire"

df_confirmed.loc[df_confirmed['country'] == "Reunion", "country"] = "Réunion"
df_deaths.loc[df_deaths['country'] == "Reunion", "country"] = "Réunion"
df_covid19.loc[df_covid19['country'] == "Reunion", "country"] = "Réunion"
df_table.loc[df_table['Country_Region'] == "Reunion", "Country_Region"] = "Réunion"
df_recovered.loc[df_recovered['country'] == "Reunion", "country"] = "Réunion"

df_confirmed.loc[df_confirmed['country'] == 'Congo (Brazzaville)', "country"] = 'Republic of the Congo'
df_deaths.loc[df_deaths['country'] == 'Congo (Brazzaville)', "country"] = 'Republic of the Congo'
df_covid19.loc[df_covid19['country'] == "Congo (Brazzaville)", "country"] = "Republic of the Congo"
df_table.loc[df_table['Country_Region'] == "Congo (Brazzaville)", "Country_Region"] = "Republic of the Congo"
df_recovered.loc[df_recovered['country'] == 'Congo (Brazzaville)', "country"] = 'Republic of the Congo'

df_confirmed.loc[df_confirmed['country'] == 'Bahamas, The', "country"] = 'Bahamas'
df_deaths.loc[df_deaths['country'] == 'Bahamas, The', "country"] = 'Bahamas'
df_covid19.loc[df_covid19['country'] == "Bahamas, The", "country"] = "Bahamas"
df_table.loc[df_table['Country_Region'] == "Bahamas, The", "Country_Region"] = "Bahamas"
df_recovered.loc[df_recovered['country'] == 'Bahamas, The', "country"] = 'Bahamas'

df_confirmed.loc[df_confirmed['country'] == 'Gambia, The', "country"] = 'Gambia'
df_deaths.loc[df_deaths['country'] == 'Gambia, The', "country"] = 'Gambia'
df_covid19.loc[df_covid19['country'] == "Gambia, The", "country"] = "Gambia"
df_table.loc[df_table['Country_Region'] == "Gambia", "Country_Region"] = "Gambia"
df_recovered.loc[df_recovered['country'] == 'Gambia, The', "country"] = 'Gambia'

# getting all countries
countries = np.asarray(df_confirmed["country"])
countries1 = np.asarray(df_covid19["country"])
# Continent_code to Continent_names
continents = {
    'NA': 'North America',
    'SA': 'South America', 
    'AS': 'Asia',
    'OC': 'Australia',
    'AF': 'Africa',
    'EU' : 'Europe',
    'na' : 'Others'
}
In [86]:
df_confirmed = df_confirmed.replace(np.nan, '', regex=True)
df_confirmed = df_confirmed.drop(['state'],axis=1)
df_confirmed.head()
Out[86]:
country Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 ... 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20 4/7/20
0 Afghanistan 33.0000 65.0000 0 0 0 0 0 0 0 ... 120 170 174 237 273 281 299 349 367 423
1 Albania 41.1533 20.1683 0 0 0 0 0 0 0 ... 212 223 243 259 277 304 333 361 377 383
2 Algeria 28.0339 1.6596 0 0 0 0 0 0 0 ... 511 584 716 847 986 1171 1251 1320 1423 1468
3 Andorra 42.5063 1.5218 0 0 0 0 0 0 0 ... 334 370 376 390 428 439 466 501 525 545
4 Angola -11.2027 17.8739 0 0 0 0 0 0 0 ... 7 7 7 8 8 8 10 14 16 17

5 rows × 80 columns

In [87]:
df_deaths = df_deaths.replace(np.nan, '', regex=True)
df_deaths = df_deaths.drop(['state'],axis=1)
df_deaths.head()
Out[87]:
country Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 ... 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20 4/7/20
0 Afghanistan 33.0000 65.0000 0 0 0 0 0 0 0 ... 4 4 4 4 6 6 7 7 11 14
1 Albania 41.1533 20.1683 0 0 0 0 0 0 0 ... 10 11 15 15 16 17 20 20 21 22
2 Algeria 28.0339 1.6596 0 0 0 0 0 0 0 ... 31 35 44 58 86 105 130 152 173 193
3 Andorra 42.5063 1.5218 0 0 0 0 0 0 0 ... 6 8 12 14 15 16 17 18 21 22
4 Angola -11.2027 17.8739 0 0 0 0 0 0 0 ... 2 2 2 2 2 2 2 2 2 2

5 rows × 80 columns

In [88]:
df_recovered = df_recovered.replace(np.nan, '', regex=True)
df_recovered = df_recovered.drop(['state'],axis=1)
df_recovered.head()
Out[88]:
country Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 ... 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20 4/7/20
0 Afghanistan 33.0000 65.0000 0 0 0 0 0 0 0 ... 2 2 5 5 10 10 10 15 18 18
1 Albania 41.1533 20.1683 0 0 0 0 0 0 0 ... 33 44 52 67 76 89 99 104 116 131
2 Algeria 28.0339 1.6596 0 0 0 0 0 0 0 ... 31 37 46 61 61 62 90 90 90 113
3 Andorra 42.5063 1.5218 0 0 0 0 0 0 0 ... 1 10 10 10 10 16 21 26 31 39
4 Angola -11.2027 17.8739 0 0 0 0 0 0 0 ... 0 0 1 1 1 1 2 2 2 2

5 rows × 80 columns

In [89]:
df_table = df_table.replace(np.nan, '', regex=True)
df_table.head()
Out[89]:
Country_Region Last_Update Confirmed Deaths Recovered Active Delta_Confirmed Delta_Recovered
0 Afghanistan 2020-01-22 0 0 0
1 Afghanistan 2020-01-23 0 0 0
2 Afghanistan 2020-01-24 0 0 0
3 Afghanistan 2020-01-25 0 0 0
4 Afghanistan 2020-01-26 0 0 0
In [90]:
df_countries_cases = df_covid19.copy().drop(['Lat','Long_','Last_Update'],axis=1)
In [91]:
df_countries_cases.head()
Out[91]:
country Confirmed Deaths Recovered Active
0 Australia 6010 50 1080 4880
1 Austria 12824 273 4512 8039
2 Canada 17897 381 4047 13469
3 China 82809 3337 77565 1907
4 Denmark 5581 218 1762 3601
In [92]:
df_countries_cases.index = df_countries_cases['country']
df_countries_cases = df_countries_cases.drop(['country'],axis=1)
df_countries_cases.head()
Out[92]:
Confirmed Deaths Recovered Active
country
Australia 6010 50 1080 4880
Austria 12824 273 4512 8039
Canada 17897 381 4047 13469
China 82809 3337 77565 1907
Denmark 5581 218 1762 3601
In [93]:
df_countries_cases = df_countries_cases.groupby(['country']).sum()
In [94]:
df_countries_cases.head()
Out[94]:
Confirmed Deaths Recovered Active
country
Afghanistan 423 14 18 391
Albania 400 22 154 224
Algeria 1468 193 113 1162
Andorra 545 22 39 484
Angola 17 2 2 13
In [95]:
df_countries_cases['Mortality_Rate(per100)'] = np.round(100 * df_countries_cases['Deaths']/df_countries_cases['Confirmed'],2)
In [96]:
df_countries_cases.head()
Out[96]:
Confirmed Deaths Recovered Active Mortality_Rate(per100)
country
Afghanistan 423 14 18 391 3.31
Albania 400 22 154 224 5.50
Algeria 1468 193 113 1162 13.15
Andorra 545 22 39 484 4.04
Angola 17 2 2 13 11.76
In [97]:
df_countries_cases.sort_values('Confirmed',ascending=False,inplace=True)
In [98]:
df_countries_cases.head()
Out[98]:
Confirmed Deaths Recovered Active Mortality_Rate(per100)
country
USA 399929 12911 22539 364479 3.23
Spain 146690 14555 48021 84114 9.92
Italy 135586 17127 24392 94067 12.63
France 110070 10343 19523 80204 9.40
Germany 107663 2016 36081 69566 1.87
In [99]:
df_total_world = pd.DataFrame(df_countries_cases.sum()).transpose().style.background_gradient(cmap='prism',axis=1)
df_total_world
Out[99]:
Confirmed Deaths Recovered Active Mortality_Rate(per100)
0 1.44624e+06 83424 308146 1.05467e+06 742.95
In [100]:
df_total_world.to_excel('World_Total_COVID19_data.xlsx',engine='openpyxl')
In [101]:
df_countries = df_countries_cases.style.background_gradient(cmap='Blues',subset=["Confirmed"])\
                        .background_gradient(cmap='Reds',subset=["Deaths"])\
                        .background_gradient(cmap='Greens',subset=["Recovered"])\
                        .background_gradient(cmap='Purples',subset=["Active"])\
                        .background_gradient(cmap='YlOrBr',subset=["Mortality_Rate(per100)"])
In [102]:
df_countries
Out[102]:
Confirmed Deaths Recovered Active Mortality_Rate(per100)
country
USA 399929 12911 22539 364479 3.23
Spain 146690 14555 48021 84114 9.92
Italy 135586 17127 24392 94067 12.63
France 110070 10343 19523 80204 9.4
Germany 107663 2016 36081 69566 1.87
China 82809 3337 77565 1907 4.03
Iran 64586 3993 27039 33554 6.18
United Kingdom 55957 6171 332 49454 11.03
Turkey 34109 725 1582 31802 2.13
Belgium 23403 2240 4681 16482 9.57
Switzerland 22789 858 8704 13227 3.76
Netherlands 20678 2255 272 18151 10.91
Canada 17897 381 4047 13469 2.13
Brazil 14072 691 127 13254 4.91
Austria 12824 273 4512 8039 2.13
Portugal 12442 345 184 11913 2.77
South Korea 10384 200 6776 3408 1.93
Israel 9404 72 801 8531 0.77
Russia 8672 63 580 8029 0.73
Sweden 8419 687 205 7527 8.16
Norway 6086 93 32 5961 1.53
Australia 6010 50 1080 4880 0.83
Ireland 5709 210 25 5474 3.68
Denmark 5581 218 1762 3601 3.91
India 5480 164 468 4848 2.99
Chile 5116 43 898 4175 0.84
Czechia 5033 91 181 4761 1.81
Poland 5000 136 222 4642 2.72
Romania 4761 210 528 4023 4.41
Japan 4257 93 622 3542 2.18
Malaysia 4119 65 1487 2567 1.58
Pakistan 4072 58 467 3547 1.42
Ecuador 3995 220 140 3635 5.51
Philippines 3870 182 96 3592 4.7
Luxembourg 2970 44 500 2426 1.48
Indonesia 2956 240 222 2494 8.12
Peru 2954 107 1301 1546 3.62
Saudi Arabia 2795 41 615 2139 1.47
Mexico 2785 141 633 2011 5.06
Finland 2487 40 300 2147 1.61
Serbia 2447 61 0 2386 2.49
Thailand 2369 30 888 1451 1.27
United Arab Emirates 2359 12 186 2161 0.51
Panama 2249 59 16 2174 2.62
Qatar 2210 6 178 2026 0.27
Dominican Republic 1956 98 36 1822 5.01
Greece 1832 81 269 1482 4.42
Colombia 1780 50 100 1630 2.81
South Africa 1749 13 95 1641 0.74
Argentina 1715 60 338 1317 3.5
Ukraine 1668 52 35 1581 3.12
Iceland 1586 6 559 1021 0.38
Singapore 1481 6 377 1098 0.41
Algeria 1468 193 113 1162 13.15
Egypt 1450 94 276 1080 6.48
Croatia 1282 18 167 1097 1.4
Morocco 1242 91 97 1054 7.33
New Zealand 1210 1 282 927 0.08
Estonia 1185 24 72 1089 2.03
Iraq 1122 65 373 684 5.79
Slovenia 1091 40 120 931 3.67
Belarus 1066 13 77 976 1.22
Moldova 1056 24 40 992 2.27
Lithuania 912 15 8 889 1.64
Hungary 895 58 94 743 6.48
Armenia 881 9 114 758 1.02
Kuwait 855 1 111 743 0.12
Bahrain 811 5 465 341 0.62
Bosnia and Herzegovina 777 33 77 667 4.25
Azerbaijan 717 8 44 665 1.12
Diamond Princess 712 11 619 82 1.54
Kazakhstan 709 7 54 648 0.99
Cameroon 685 9 60 616 1.31
Tunisia 623 23 25 575 3.69
North Macedonia 599 26 30 543 4.34
Bulgaria 581 23 42 516 3.96
Slovakia 581 2 13 566 0.34
Latvia 577 2 16 559 0.35
Lebanon 575 19 62 494 3.3
Andorra 545 22 39 484 4.04
Uzbekistan 534 3 30 501 0.56
Cyprus 494 9 47 438 1.82
Costa Rica 483 2 24 457 0.41
Uruguay 424 7 150 267 1.65
Afghanistan 423 14 18 391 3.31
Oman 419 2 72 345 0.48
Albania 400 22 154 224 5.5
Cuba 396 11 27 358 2.78
Burkina Faso 384 19 127 238 4.95
Taiwan 379 5 61 313 1.32
Jordan 353 6 138 209 1.7
Côte d'Ivoire 349 3 41 305 0.86
Honduras 312 22 6 284 7.05
Malta 299 0 5 294 0
Ghana 287 5 31 251 1.74
San Marino 279 34 40 205 12.19
Niger 278 11 26 241 3.96
Kyrgyzstan 270 4 33 233 1.48
Mauritius 268 7 8 253 2.61
West Bank and Gaza 263 1 44 218 0.38
Nigeria 254 6 44 204 2.36
Vietnam 251 0 126 125 0
Montenegro 248 2 4 242 0.81
Senegal 237 2 105 130 0.84
Bangladesh 218 20 33 165 9.17
Bolivia 210 15 2 193 7.14
Georgia 208 3 48 157 1.44
Sri Lanka 188 6 42 140 3.19
Kosovo 184 5 30 149 2.72
Democratic Republic of the Congo 180 18 9 153 10
Kenya 172 6 7 159 3.49
Venezuela 166 7 65 94 4.22
Guinea 144 0 5 139 0
Brunei 135 1 91 43 0.74
Djibouti 121 0 18 103 0
Paraguay 119 5 15 99 4.2
Cambodia 117 0 63 54 0
Trinidad and Tobago 107 8 1 98 7.48
Rwanda 105 0 7 98 0
Madagascar 93 0 11 82 0
El Salvador 93 5 9 79 5.38
Guatemala 80 3 17 60 3.75
Monaco 79 1 4 74 1.27
Liechtenstein 78 1 55 22 1.28
Togo 65 3 23 39 4.62
Barbados 63 3 6 54 4.76
Jamaica 63 3 9 51 4.76
Mali 56 5 12 39 8.93
Ethiopia 55 2 4 49 3.64
Uganda 52 0 0 52 0
Republic of the Congo 45 5 2 38 11.11
Zambia 39 1 7 31 2.56
Bahamas 36 6 5 25 16.67
Guinea-Bissau 33 0 0 33 0
Guyana 33 5 8 20 15.15
Eritrea 31 0 0 31 0
Gabon 30 1 1 28 3.33
Benin 26 1 5 20 3.85
Haiti 25 1 0 24 4
Tanzania 24 1 5 18 4.17
Burma 22 3 0 19 13.64
Libya 21 1 2 18 4.76
Antigua and Barbuda 19 2 0 17 10.53
Maldives 19 0 13 6 0
Syria 19 2 3 14 10.53
Angola 17 2 2 13 11.76
Namibia 16 0 3 13 0
Mongolia 16 0 4 12 0
Equatorial Guinea 16 0 3 13 0
Laos 15 0 0 15 0
Dominica 15 0 1 14 0
Fiji 15 0 0 15 0
Saint Lucia 14 0 1 13 0
Sudan 14 2 2 10 14.29
Liberia 14 3 3 8 21.43
Grenada 12 0 0 12 0
Zimbabwe 11 2 0 9 18.18
Saint Kitts and Nevis 11 0 0 11 0
Seychelles 11 0 0 11 0
Mozambique 10 0 1 9 0
Chad 10 0 2 8 0
Eswatini 10 0 4 6 0
Suriname 10 1 3 6 10
MS Zaandam 9 2 0 7 22.22
Nepal 9 0 1 8 0
Malawi 8 1 0 7 12.5
Central African Republic 8 0 0 8 0
Somalia 8 0 1 7 0
Saint Vincent and the Grenadines 8 0 1 7 0
Sierra Leone 7 0 0 7 0
Belize 7 1 0 6 14.29
Holy See 7 0 0 7 0
Cabo Verde 7 1 1 5 14.29
Botswana 6 1 0 5 16.67
Mauritania 6 1 2 3 16.67
Nicaragua 6 1 0 5 16.67
Bhutan 5 0 2 3 0
Gambia 4 1 2 1 25
Western Sahara 4 0 0 4 0
Sao Tome and Principe 4 0 0 4 0
Burundi 3 0 0 3 0
South Sudan 2 0 0 2 0
Papua New Guinea 2 0 0 2 0
Timor-Leste 1 0 0 1 0
In [103]:
df_countries.to_excel('World_COVID19_data.xlsx',engine='openpyxl')
In [104]:
f = plt.figure(figsize=(18,9))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_countries_cases.sort_values('Confirmed')["Confirmed"].index[-10:],df_countries_cases.sort_values('Confirmed')["Confirmed"].values[-10:],color="mediumblue")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Confirmed Cases",fontsize=18)
plt.title("Top 10 Countries (Confirmed Cases)",fontsize=20)
plt.grid(alpha=0.3)
plt.savefig('Top 10 Countries (Confirmed Cases).png')
In [105]:
f = plt.figure(figsize=(18,9))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_countries_cases.sort_values('Deaths')["Deaths"].index[-10:],df_countries_cases.sort_values('Deaths')["Deaths"].values[-10:],color="red")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Deaths Cases",fontsize=18)
plt.title("Top 10 Countries (Deaths Cases)",fontsize=20)
plt.grid(alpha=0.3)
plt.savefig('Top 10 Countries (Deaths Cases).png')
In [106]:
f = plt.figure(figsize=(18,9))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_countries_cases.sort_values('Recovered')["Recovered"].index[-10:],df_countries_cases.sort_values('Recovered')["Recovered"].values[-10:],color="lawngreen")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Recovered Cases",fontsize=18)
plt.title("Top 10 Countries (Recovered Cases)",fontsize=20)
plt.grid(alpha=0.3)
plt.savefig('Top 10 Countries (Recovered Cases).png')
In [107]:
df_countries_cases.corr().style.background_gradient(cmap='OrRd')
Out[107]:
Confirmed Deaths Recovered Active Mortality_Rate(per100)
Confirmed 1 0.826433 0.617086 0.974346 0.0781513
Deaths 0.826433 1 0.628942 0.750647 0.185272
Recovered 0.617086 0.628942 1 0.426819 0.0723316
Active 0.974346 0.750647 0.426819 1 0.0610422
Mortality_Rate(per100) 0.0781513 0.185272 0.0723316 0.0610422 1
In [108]:
world_map = folium.Map(location=[10,0], tiles="cartodbpositron", zoom_start=2,max_zoom=6,min_zoom=2)
for i in range(0,len(df_confirmed)):
    folium.Circle(
        location=[df_confirmed.iloc[i]['Lat'], df_confirmed.iloc[i]['Long']],
        tooltip = "<h5 style='text-align:center;font-weight: bold'>"+df_confirmed.iloc[i]['country']+"</h5>"+
                    "<div style='text-align:center;'>"+"</div>"+
                    "<hr style='margin:10px;'>"+
                    "<ul style='color: #444;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
        "<li>Confirmed: "+str(df_confirmed.iloc[i,-1])+"</li>"+
        "<li>Deaths:   "+str(df_deaths.iloc[i,-1])+"</li>"+
        "<li>Mortality Rate:   "+str(np.round(df_deaths.iloc[i,-1]/(df_confirmed.iloc[i,-1]+1.00001)*100,2))+"</li>"+
        "</ul>"
        ,
        radius=(int((np.log(df_confirmed.iloc[i,-1]+1.00001)))+0.2)*50000,
        color='#ff6600',
        fill_color='#ff8533',
        fill=True).add_to(world_map)

world_map
Out[108]:
In [109]:
df_countries = df_confirmed.groupby(['country']).sum()
In [110]:
df_countries = df_countries.sort_values(df_countries.columns[-1],ascending=False)
In [111]:
countries = df_countries[df_countries[df_countries.columns[-1]] >= 20000].index
In [112]:
countries
Out[112]:
Index(['USA', 'Spain', 'Italy', 'France', 'Germany', 'China', 'Iran',
       'United Kingdom', 'Turkey', 'Switzerland', 'Belgium'],
      dtype='object', name='country')
In [113]:
def plot_params(ax,axis_label= None, plt_title = None,label_size=15, axis_fsize = 15, title_fsize = 20, scale = 'linear' ):
    # Tick-Parameters
    ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
    ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
    ax.tick_params(which='both', width=1,labelsize=label_size)
    ax.tick_params(which='major', length=6)
    ax.tick_params(which='minor', length=3, color='0.8')
    
    # Grid
    plt.grid(lw = 1, ls = '-', c = "0.7", which = 'major')
    plt.grid(lw = 1, ls = '-', c = "0.9", which = 'minor')

    # Plot Title
    plt.title( plt_title,{'fontsize':title_fsize})
    
    # Yaxis sacle
    plt.yscale(scale)
    plt.minorticks_on()
    # Plot Axes Labels
    xl = plt.xlabel(axis_label[0],fontsize = axis_fsize)
    yl = plt.ylabel(axis_label[1],fontsize = axis_fsize)
    
def visualize_covid_cases(confirmed, deaths, continent=None , country = None , state = None, period = None, figure = None, scale = "linear"):
    x = 0
    if figure == None:
        f = plt.figure(figsize=(10,10))
        # Sub plot
        ax = f.add_subplot(111)
    else :
        f = figure[0]
        # Sub plot
        ax = f.add_subplot(figure[1],figure[2],figure[3])
    
    plt.tight_layout(pad=10, w_pad=5, h_pad=5)
    
    stats = [confirmed, deaths]
    label = ["Confirmed", "Deaths"]
    
    if continent != None:
        params = ["continent",continent]
    elif country != None:
        params = ["country",country]
    else: 
        params = ["All", "All"]
    color = ["darkcyan","crimson"]
    marker_style = dict(linewidth=3, linestyle='-', marker='o',markersize=4, markerfacecolor='#ffffff')
    for i,stat in enumerate(stats):
        if params[1] == "All" :
            cases = np.sum(np.asarray(stat.iloc[:,5:]),axis = 0)[x:]
        else :
            cases = np.sum(np.asarray(stat[stat[params[0]] == params[1]].iloc[:,5:]),axis = 0)[x:]
        date = np.arange(1,cases.shape[0]+1)[x:]
        plt.plot(date,cases,label = label[i]+" (Total : "+str(cases[-1])+")",color=color[i],**marker_style)

    if params[1] == "All" :
        Total_confirmed = np.sum(np.asarray(stats[0].iloc[:,5:]),axis = 0)[x:]
        Total_deaths = np.sum(np.asarray(stats[1].iloc[:,5:]),axis = 0)[x:]
    else :
        Total_confirmed =  np.sum(np.asarray(stats[0][stat[params[0]] == params[1]].iloc[:,5:]),axis = 0)[x:]
        Total_deaths = np.sum(np.asarray(stats[1][stat[params[0]] == params[1]].iloc[:,5:]),axis = 0)[x:]
        
    text = "From "+stats[0].columns[5]+" to "+stats[0].columns[-1]+"\n"
    text += "Mortality rate : "+ str(int(Total_deaths[-1]/(Total_confirmed[-1])*10000)/100)+"\n"
    text += "Last 5 Days:\n"
    text += "Confirmed : " + str(Total_confirmed[-1] - Total_confirmed[-6])+"\n"
    text += "Deaths : " + str(Total_deaths[-1] - Total_deaths[-6])+"\n"
    text += "Last 24 Hours:\n"
    text += "Confirmed : " + str(Total_confirmed[-1] - Total_confirmed[-2])+"\n"
    text += "Deaths : " + str(Total_deaths[-1] - Total_deaths[-2])+"\n"
    
    plt.text(0.02, 0.78, text, fontsize=15, horizontalalignment='left', verticalalignment='top', transform=ax.transAxes,bbox=dict(facecolor='white', alpha=0.4))
    
    # Plot Axes Labels
    axis_label = ["Days ("+df_confirmed.columns[5]+" - "+df_confirmed.columns[-1]+")","No of Cases"]
    
    # Plot Parameters
    plot_params(ax,axis_label,scale = scale)
    
    # Plot Title
    if params[1] == "All" :
        plt.title("COVID-19 Cases World",{'fontsize':25})
    else:   
        plt.title("COVID-19 Cases for "+params[1] ,{'fontsize':25})
        
    # Legend Location
    l = plt.legend(loc= "best",fontsize = 15)
    
    if figure == None:
        plt.show()
        
def get_total_cases(cases, country = "All"):
    if(country == "All") :
        return np.sum(np.asarray(cases.iloc[:,5:]),axis = 0)[-1]
    else :
        return np.sum(np.asarray(cases[cases["country"] == country].iloc[:,5:]),axis = 0)[-1]
    
def get_mortality_rate(confirmed,deaths, continent = None, country = None):
    if continent != None:
        params = ["continent",continent]
    elif country != None:
        params = ["country",country]
    else :
        params = ["All", "All"]
    
    if params[1] == "All" :
        Total_confirmed = np.sum(np.asarray(confirmed.iloc[:,5:]),axis = 0)
        Total_deaths = np.sum(np.asarray(deaths.iloc[:,5:]),axis = 0)
        mortality_rate = np.round((Total_deaths/Total_confirmed)*100,2)
    else :
        Total_confirmed =  np.sum(np.asarray(confirmed[confirmed[params[0]] == params[1]].iloc[:,5:]),axis = 0)
        Total_deaths = np.sum(np.asarray(deaths[deaths[params[0]] == params[1]].iloc[:,5:]),axis = 0)
        mortality_rate = np.round((Total_deaths/Total_confirmed)*100,2)
    
    return np.nan_to_num(mortality_rate)
def dd(date1,date2):
    return (datetime.strptime(date1,'%m/%d/%y') - datetime.strptime(date2,'%m/%d/%y')).days
In [114]:
cols = 1
rows = 1
f = plt.figure(figsize=(10,10*rows))
visualize_covid_cases(df_confirmed, df_deaths,country = "All",figure = [f,rows,cols, 1])

plt.savefig('COIVD-19-World.png')
plt.show()
In [115]:
df_countries = df_countries.groupby(['country']).sum()
df_countries = df_countries.sort_values(df_countries.columns[-1],ascending=False)
countries = df_countries[df_countries[df_countries.columns[-1]]>=20000].index
cols = 2
rows = int(np.ceil(countries.shape[0]/cols))
f = plt.figure(figsize=(20,8*rows))
for i,country in enumerate(countries):
    visualize_covid_cases(df_confirmed,df_deaths,country=country,figure=[f,rows,cols,i+1])
    
plt.savefig("Top 10 countries latest trends.png")
plt.show()
In [116]:
case_nums_country = df_confirmed.groupby("country").sum().drop(['Lat','Long'],axis =1).apply(lambda x: x[x > 0].count(), axis =0)
d = [datetime.strptime(date,'%m/%d/%y').strftime("%d %b") for date in case_nums_country.index]

f = plt.figure(figsize=(15,8))
f.add_subplot(111)
marker_style = dict(c="crimson",linewidth=6, linestyle='-', marker='o',markersize=8, markerfacecolor='#ffffff')
plt.plot(d, case_nums_country,**marker_style)
plt.tick_params(labelsize = 14)
plt.xticks(list(np.arange(0,len(d),int(len(d)/5))),d[:-1:int(len(d)/5)]+[d[-1]])

#labels
plt.xlabel("Dates",fontsize=18)
plt.ylabel("Number of Countries/Regions",fontsize=18)
plt.grid(alpha = 0.3)

plt.savefig('spread.png')
plt.show()
plt.close()
In [117]:
model = models.load_model('model_confirmed.h5')
In [118]:
model.summary()
Model: "model_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_19 (InputLayer)        (None, 1)                 0         
_________________________________________________________________
Dense_l1 (Dense)             (None, 80)                160       
_________________________________________________________________
LRelu_l1 (LeakyReLU)         (None, 80)                0         
_________________________________________________________________
Dense_l2 (Dense)             (None, 80)                6480      
_________________________________________________________________
LRelu_l2 (LeakyReLU)         (None, 80)                0         
_________________________________________________________________
Dense_l3 (Dense)             (None, 1)                 81        
_________________________________________________________________
Output (LeakyReLU)           (None, 1)                 0         
=================================================================
Total params: 6,721
Trainable params: 6,721
Non-trainable params: 0
_________________________________________________________________
In [119]:
data_y = np.log10(np.asarray(df_confirmed.sum()[5:]).astype("float32"))
data_x = np.arange(1,len(data_y)+1)
In [120]:
lakh = 100000
prediction_days = 10
data = np.power(10,model.predict(np.arange(1,len(data_y)+prediction_days+1)))
temp_data = df_confirmed.iloc[:,5:].sum(axis =0)
f = plt.figure(figsize=(15,10))
ax = f.add_subplot(111)

date = np.arange(0,len(temp_data))

marker_style = dict(linewidth=3, linestyle='-', marker='o',markersize=7, markerfacecolor='#ffffff')
plt.plot(date,temp_data/lakh,"-.",color="darkcyan",**marker_style, label="Actual Curve")

date = np.arange(0,len(data))
plt.plot(date,data/lakh,"-.",color="orangered",label="Predicted Curve")

nextdays = [(datetime.strptime(d[-1],'%d %b')+timedelta(days=i)).strftime("%d %b") for i in range(1,prediction_days+1)]
total = d + nextdays

text = "Prediction for next "+str(prediction_days) +" days:\n"
for i in range(prediction_days):
    text += nextdays[i]+" : "+str(np.round(data[-1*(prediction_days-i)],-3)[0]/lakh)+" L\n"

plt.text(0.02, 0.78, text, fontsize=17, horizontalalignment='left', verticalalignment='top', transform=ax.transAxes,bbox=dict(facecolor='white', alpha=0.4))

# X-axis
plt.xticks(list(np.arange(0,len(total),int(len(total)/5))),d[:-1:int(len(total)/5)]+[total[-1]])

# Tick-Parameters
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.tick_params(which='both', width=1,labelsize=14)
ax.tick_params(which='major', length=6)
ax.tick_params(which='minor', length=3, color='0.8')

# Grid
plt.grid(lw = 1, ls = '-', c = "0.7", which = 'major')
plt.grid(lw = 1, ls = '-', c = "0.9", which = 'minor')

# Plot Title
plt.title("COVID-19 Next 10 day Prediction Curve-Global Confirmed Cases",{'fontsize':22})

# Axis Lable
plt.xlabel("Date",fontsize =18)
plt.ylabel("Number of Confirmed Cases (Lakh)",fontsize =18)

plt.yscale("log")
plt.legend(fontsize =18)
plt.tick_params(labelsize = 13) 
plt.savefig("Prediction Curve-Confirmed.png")
plt.show()
In [121]:
# Data
temp_data = np.nan_to_num(df_confirmed.sum()[5:].diff())


# Plot
f = plt.figure(figsize=(15,10))
ax = f.add_subplot(111)


date = np.arange(0,len(temp_data))

marker_style = dict(linewidth=2, linestyle='-', marker='o',markersize=5)
plt.plot(date,temp_data/1000,"-.",color="red",**marker_style)


nextdays = [(datetime.strptime(d[-1],'%d %b')+timedelta(days=i)).strftime("%d %b") for i in range(1,prediction_days+1)]
total =d+nextdays

# X-axis
plt.xticks(list(np.arange(0,len(total),int(len(total)/5))),total[:-1:int(len(total)/5)]+[total[-1]])

# Tick-Parameters
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.tick_params(which='both', width=1,labelsize=12)
ax.tick_params(which='major', length=6)
ax.tick_params(which='minor', length=3, color='0.8')

# Grid
plt.grid(lw = 1, ls = '-', c = "0.85", which = 'major')
plt.grid(lw = 1, ls = '-', c = "0.95", which = 'minor')

# Plot Title
plt.title("COVID-19 Global Daily New Confirmed Cases",{'fontsize':22})

# Axis Lable
plt.xlabel("Date",fontsize =18)
plt.ylabel("Number of Daily Confirmed Cases (Thousand)",fontsize =18)

# plt.yscale("log")
plt.tick_params(labelsize = 13) 
plt.savefig("daily confirmed cases global.png")
plt.show()
In [122]:
# Data
temp_data = np.nan_to_num(df_deaths.sum()[5:].diff())


# Plot
f = plt.figure(figsize=(15,10))
ax = f.add_subplot(111)


date = np.arange(0,len(temp_data))

marker_style = dict(linewidth=2, linestyle='-', marker='o',markersize=5)
plt.plot(date,temp_data/1000,"-.",color="red",**marker_style)


nextdays = [(datetime.strptime(d[-1],'%d %b')+timedelta(days=i)).strftime("%d %b") for i in range(1,prediction_days+1)]
total =d+nextdays

# X-axis
plt.xticks(list(np.arange(0,len(total),int(len(total)/5))),total[:-1:int(len(total)/5)]+[total[-1]])

# Tick-Parameters
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.tick_params(which='both', width=1,labelsize=14)
ax.tick_params(which='major', length=6)
ax.tick_params(which='minor', length=3, color='0.8')

# Grid
plt.grid(lw = 1, ls = '-', c = "0.85", which = 'major')
plt.grid(lw = 1, ls = '-', c = "0.95", which = 'minor')

# Plot Title
plt.title("COVID-19 Global Daily Deaths Reported",{'fontsize':22})

# Axis Lable
plt.xlabel("Date",fontsize =18)
plt.ylabel("Number of Daily Deaths Reported (Thousand)",fontsize =18)

# plt.yscale("log")
plt.savefig("daily deaths cases Global.png")
plt.show()
In [123]:
thoudand = 1000
temp = df_confirmed.groupby('country').sum().diff(axis=1).sort_values(df_deaths.columns[-1],ascending =False).head(10).replace(np.nan,0)
threshold = 0
f = plt.figure(figsize=(20,12))
ax = f.add_subplot(111)
for i,country in enumerate(temp.index):
    t = temp.loc[temp.index== country].values[0]
    t = t[t>=threshold]
    
    date = np.arange(0,len(t[:]))
    plt.plot(date,t/thoudand,'-o',label = country,linewidth =2, markevery=[-1])


nextdays = [(datetime.strptime(d[-1],'%d %b')+timedelta(days=i)).strftime("%d %b") for i in range(1,prediction_days+1)]
total =d+nextdays

# X-axis
plt.xticks(list(np.arange(0,len(total),int(len(total)/5))),total[:-1:int(len(total)/5)]+[total[-1]])

# Tick-Parameters
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.tick_params(which='both', width=1,labelsize=14)
ax.tick_params(which='major', length=6)
ax.tick_params(which='minor', length=3, color='0.8')

# Grid
plt.grid(lw = 1, ls = '-', c = "0.85", which = 'major')
plt.grid(lw = 1, ls = '-', c = "0.95", which = 'minor')

# Plot Title
plt.title("COVID-19 Daily Confirmed Cases in Different Countries",{'fontsize':24})

# Axis Lable
plt.xlabel("Date",fontsize =18)
plt.ylabel("Number of Daily Confirmed Cases (Thousand)",fontsize =18)

# plt.yscale("log")
plt.legend(fontsize=18) 
plt.savefig("daily confirmed cases countrywise.png")
plt.show()
In [124]:
thoudand = 1000
temp = df_deaths.groupby('country').sum().diff(axis=1).sort_values(df_deaths.columns[-1],ascending =False).head(10).replace(np.nan,0)
threshold = 0
f = plt.figure(figsize=(20,12))
ax = f.add_subplot(111)
for i,country in enumerate(temp.index):
    t = temp.loc[temp.index== country].values[0]
    t = t[t>=threshold]
    
    date = np.arange(0,len(t[:]))
    plt.plot(date,t/thoudand,'-o',label = country,linewidth =2, markevery=[-1])


nextdays = [(datetime.strptime(d[-1],'%d %b')+timedelta(days=i)).strftime("%d %b") for i in range(1,prediction_days+1)]
total =d+nextdays

# X-axis
plt.xticks(list(np.arange(0,len(total),int(len(total)/5))),total[:-1:int(len(total)/5)]+[total[-1]])

# Tick-Parameters
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.tick_params(which='both', width=1,labelsize=15)
ax.tick_params(which='major', length=6)
ax.tick_params(which='minor', length=3, color='0.8')

# Grid
plt.grid(lw = 1, ls = '-', c = "0.85", which = 'major')
plt.grid(lw = 1, ls = '-', c = "0.95", which = 'minor')

# Plot Title
plt.title("COVID-19 Daily Deaths Reported in Different Countries",{'fontsize':24})

# Axis Lable
plt.xlabel("Date",fontsize =18)
plt.ylabel("Number of Daily Deaths Reported (Thousand)",fontsize =18)

# plt.yscale("log")
plt.legend(fontsize=18) 
plt.savefig("daily deaths reported countrywise.png")
plt.show()

COVID19 - INDIA DATA ANALYSIS

In [125]:
india_data_json = requests.get('https://api.rootnet.in/covid19-in/unofficial/covid19india.org/statewise').json()
In [126]:
df_india = pd.io.json.json_normalize(india_data_json['data']['statewise'])
df_india = df_india.set_index("state")
In [127]:
df_india = df_india.sort_values('confirmed',ascending=False)
df_india = df_india[['confirmed','active','recovered','deaths']]
df_india
Out[127]:
confirmed active recovered deaths
state
Maharashtra 1078 930 79 69
Tamil Nadu 690 664 19 7
Delhi 576 547 20 9
Telangana 404 348 45 11
Rajasthan 363 336 25 2
Kerala 336 263 71 2
Uttar Pradesh 332 308 21 3
Andhra Pradesh 329 320 6 3
Madhya Pradesh 290 248 21 21
Karnataka 181 148 28 5
Gujarat 179 138 25 16
Haryana 155 122 31 2
Jammu and Kashmir 139 130 6 3
Punjab 101 79 14 8
West Bengal 99 81 13 5
Odisha 42 39 2 1
Bihar 38 22 15 1
Uttarakhand 32 28 4 0
Assam 28 28 0 0
Himachal Pradesh 27 24 1 2
Chandigarh 18 11 7 0
Ladakh 14 4 10 0
Andaman and Nicobar Islands 11 11 0 0
Chhattisgarh 10 1 9 0
Goa 7 7 0 0
Puducherry 5 4 1 0
Jharkhand 4 4 0 0
Manipur 2 1 1 0
Dadra and Nagar Haveli 1 1 0 0
Tripura 1 1 0 0
Mizoram 1 1 0 0
Arunachal Pradesh 1 1 0 0
Daman and Diu 0 0 0 0
Lakshadweep 0 0 0 0
Meghalaya 0 0 0 0
Nagaland 0 0 0 0
Sikkim 0 0 0 0
In [128]:
total = df_india.sum()
total.name = "Total"

df_india_total = pd.DataFrame(total).transpose().style.background_gradient(cmap='prism',axis=1)
df_india_total
Out[128]:
confirmed active recovered deaths
Total 5494 4850 474 170
In [129]:
df_india_total.to_excel('India_COVID19_totalData.xlsx',engine='openpyxl')
In [130]:
df_india['Mortality_Rate(per100)'] = np.round(100 * df_india['deaths']/df_india['confirmed'],2)

df_india.sort_values('confirmed',ascending=False,inplace=True)

df_india
Out[130]:
confirmed active recovered deaths Mortality_Rate(per100)
state
Maharashtra 1078 930 79 69 6.40
Tamil Nadu 690 664 19 7 1.01
Delhi 576 547 20 9 1.56
Telangana 404 348 45 11 2.72
Rajasthan 363 336 25 2 0.55
Kerala 336 263 71 2 0.60
Uttar Pradesh 332 308 21 3 0.90
Andhra Pradesh 329 320 6 3 0.91
Madhya Pradesh 290 248 21 21 7.24
Karnataka 181 148 28 5 2.76
Gujarat 179 138 25 16 8.94
Haryana 155 122 31 2 1.29
Jammu and Kashmir 139 130 6 3 2.16
Punjab 101 79 14 8 7.92
West Bengal 99 81 13 5 5.05
Odisha 42 39 2 1 2.38
Bihar 38 22 15 1 2.63
Uttarakhand 32 28 4 0 0.00
Assam 28 28 0 0 0.00
Himachal Pradesh 27 24 1 2 7.41
Chandigarh 18 11 7 0 0.00
Ladakh 14 4 10 0 0.00
Andaman and Nicobar Islands 11 11 0 0 0.00
Chhattisgarh 10 1 9 0 0.00
Goa 7 7 0 0 0.00
Puducherry 5 4 1 0 0.00
Jharkhand 4 4 0 0 0.00
Manipur 2 1 1 0 0.00
Mizoram 1 1 0 0 0.00
Arunachal Pradesh 1 1 0 0 0.00
Dadra and Nagar Haveli 1 1 0 0 0.00
Tripura 1 1 0 0 0.00
Daman and Diu 0 0 0 0 NaN
Lakshadweep 0 0 0 0 NaN
Meghalaya 0 0 0 0 NaN
Nagaland 0 0 0 0 NaN
Sikkim 0 0 0 0 NaN
In [131]:
df_india_styled = df_india.style.background_gradient('prism')
df_india_styled

df_india_styled = df_india.style.background_gradient(cmap='Blues',subset=["confirmed"])\
                        .background_gradient(cmap='Reds',subset=["deaths"])\
                        .background_gradient(cmap='Greens',subset=["recovered"])\
                        .background_gradient(cmap='Purples',subset=["active"])\
                        .background_gradient(cmap='YlOrBr',subset=["Mortality_Rate(per100)"])
In [132]:
df_india_styled
/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/matplotlib/colors.py:512: RuntimeWarning:

invalid value encountered in less

Out[132]:
confirmed active recovered deaths Mortality_Rate(per100)
state
Maharashtra 1078 930 79 69 6.4
Tamil Nadu 690 664 19 7 1.01
Delhi 576 547 20 9 1.56
Telangana 404 348 45 11 2.72
Rajasthan 363 336 25 2 0.55
Kerala 336 263 71 2 0.6
Uttar Pradesh 332 308 21 3 0.9
Andhra Pradesh 329 320 6 3 0.91
Madhya Pradesh 290 248 21 21 7.24
Karnataka 181 148 28 5 2.76
Gujarat 179 138 25 16 8.94
Haryana 155 122 31 2 1.29
Jammu and Kashmir 139 130 6 3 2.16
Punjab 101 79 14 8 7.92
West Bengal 99 81 13 5 5.05
Odisha 42 39 2 1 2.38
Bihar 38 22 15 1 2.63
Uttarakhand 32 28 4 0 0
Assam 28 28 0 0 0
Himachal Pradesh 27 24 1 2 7.41
Chandigarh 18 11 7 0 0
Ladakh 14 4 10 0 0
Andaman and Nicobar Islands 11 11 0 0 0
Chhattisgarh 10 1 9 0 0
Goa 7 7 0 0 0
Puducherry 5 4 1 0 0
Jharkhand 4 4 0 0 0
Manipur 2 1 1 0 0
Mizoram 1 1 0 0 0
Arunachal Pradesh 1 1 0 0 0
Dadra and Nagar Haveli 1 1 0 0 0
Tripura 1 1 0 0 0
Daman and Diu 0 0 0 0 nan
Lakshadweep 0 0 0 0 nan
Meghalaya 0 0 0 0 nan
Nagaland 0 0 0 0 nan
Sikkim 0 0 0 0 nan
In [133]:
df_india_styled.to_excel('India_COVID19_data.xlsx',engine='openpyxl')
/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/matplotlib/colors.py:512: RuntimeWarning:

invalid value encountered in less

In [134]:
# Adding Location data (Latitude,Longitude)
locations = {
    "Kerala" : [10.8505,76.2711],
    "Maharashtra" : [19.7515,75.7139],
    "Karnataka": [15.3173,75.7139],
    "Telangana": [18.1124,79.0193],
    "Uttar Pradesh": [26.8467,80.9462],
    "Rajasthan": [27.0238,74.2179],
    "Gujarat":[22.2587,71.1924],
    "Delhi" : [28.7041,77.1025],
    "Punjab":[31.1471,75.3412],
    "Tamil Nadu": [11.1271,78.6569],
    "Haryana": [29.0588,76.0856],
    "Madhya Pradesh":[22.9734,78.6569],
    "Jammu and Kashmir":[33.7782,76.5762],
    "Ladakh": [34.1526,77.5770],
    "Andhra Pradesh":[15.9129,79.7400],
    "West Bengal": [22.9868,87.8550],
    "Bihar": [25.0961,85.3131],
    "Chhattisgarh":[21.2787,81.8661],
    "Chandigarh":[30.7333,76.7794],
    "Uttarakhand":[30.0668,79.0193],
    "Himachal Pradesh":[31.1048,77.1734],
    "Goa": [15.2993,74.1240],
    "Odisha":[20.9517,85.0985],
    "Andaman and Nicobar Islands": [11.7401,92.6586],
    "Puducherry":[11.9416,79.8083],
    "Manipur":[24.6637,93.9063],
    "Mizoram":[23.1645,92.9376],
    "Assam":[26.2006,92.9376],
    "Meghalaya":[25.4670,91.3662],
    "Tripura":[23.9408,91.9882],
    "Arunachal Pradesh":[28.2180,94.7278],
    "Jharkhand" : [23.6102,85.2799],
    "Nagaland": [26.1584,94.5624],
    "Sikkim": [27.5330,88.5122],
    "Dadra and Nagar Haveli":[20.1809,73.0169],
    "Lakshadweep":[10.5667,72.6417],
    "Daman and Diu":[20.4283,72.8397]    
}
df_india["Lat"] = ""
df_india["Long"] = ""
for index in df_india.index :
    df_india.loc[df_india.index == index,"Lat"] = locations[index][0]
    df_india.loc[df_india.index == index,"Long"] = locations[index][1]
In [135]:
url = "https://raw.githubusercontent.com/Subhash9325/GeoJson-Data-of-Indian-States/master/Indian_States"
state_json = requests.get(url).json()
india = folium.Map(location=[23,80], zoom_start=4,max_zoom=6,min_zoom=4,height=500,width="80%")
for i in range(0,len(df_india[df_india['confirmed']>0].index)):
    folium.Circle(
        location=[df_india.iloc[i]['Lat'], df_india.iloc[i]['Long']],
        tooltip = "<h5 style='text-align:center;font-weight: bold'>"+df_india.iloc[i].name+"</h5>"+
                    "<hr style='margin:10px;'>"+
                    "<ul style='color: #444;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
        "<li>Confirmed: "+str(df_india.iloc[i]['confirmed'])+"</li>"+
        "<li>Active:   "+str(df_india.iloc[i]['active'])+"</li>"+
        "<li>Recovered:   "+str(df_india.iloc[i]['recovered'])+"</li>"+
        "<li>Deaths:   "+str(df_india.iloc[i]['deaths'])+"</li>"+
        
        "<li>Mortality Rate:   "+str(np.round(df_india.iloc[i]['deaths']/(df_india.iloc[i]['confirmed']+1)*100,2))+"</li>"+
        "</ul>"
        ,
        radius=(int(np.log2(df_india.iloc[i]['confirmed']+1)))*15000,
        color='#ff6600',
        fill_color='#ff8533',
        fill=True).add_to(india)

india
Out[135]:
In [136]:
f = plt.figure(figsize=(18,9))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_india.sort_values('confirmed')["confirmed"].index[:],df_india.sort_values('confirmed')["confirmed"].values[:],color="mediumblue")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Confirmed Cases",fontsize=18)
plt.title("India States (Confirmed Cases)",fontsize=20)
plt.grid(alpha=0.3)
plt.savefig('India States (Confirmed Cases).png')
In [137]:
f = plt.figure(figsize=(18,9))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_india.sort_values('active')["active"].index[:],df_india.sort_values('active')["active"].values[:],color="yellow")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Active Cases",fontsize=18)
plt.title("India States (ActiveCases)",fontsize=20)
plt.grid(alpha=0.3)
plt.savefig('India States (Active Cases).png')
In [138]:
f = plt.figure(figsize=(18,9))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_india.sort_values('recovered')["recovered"].index[:],df_india.sort_values('recovered')["recovered"].values[:],color="lawngreen")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Recovered Cases",fontsize=18)
plt.title("India States (Recovered Cases)",fontsize=20)
plt.grid(alpha=0.3)
plt.savefig('India States (Recovered Cases).png')
In [139]:
f = plt.figure(figsize=(18,9))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_india.sort_values('deaths')["deaths"].index[:],df_india.sort_values('deaths')["deaths"].values[:],color="red")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Deaths",fontsize=18)
plt.title("India States (Deaths)",fontsize=20)
plt.grid(alpha=0.3)
plt.savefig('India States (Deaths).png')

COVID-19 : USA TRENDS AND ANALYSIS

In [140]:
date_usa = datetime.strptime(df_confirmed.columns[-1],'%m/%d/%y').strftime('%m-%d-%Y')
df_temp = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/"+date_usa+".csv")
df_usa = df_temp.loc[df_temp['Country_Region'] == 'US']
df_usa = df_usa.rename(columns={'Admin2':'County'})
df_usa = df_usa.rename(columns={'Province_State':'State'})
df_usa
Out[140]:
FIPS County State Country_Region Last_Update Lat Long_ Confirmed Deaths Recovered Active Combined_Key
0 45001.0 Abbeville South Carolina US 2020-04-07 23:04:49 34.223334 -82.461707 5 0 0 0 Abbeville, South Carolina, US
1 22001.0 Acadia Louisiana US 2020-04-07 23:04:49 30.295065 -92.414197 82 2 0 0 Acadia, Louisiana, US
2 51001.0 Accomack Virginia US 2020-04-07 23:04:49 37.767072 -75.632346 11 0 0 0 Accomack, Virginia, US
3 16001.0 Ada Idaho US 2020-04-07 23:04:49 43.452658 -116.241552 419 3 0 0 Ada, Idaho, US
4 19001.0 Adair Iowa US 2020-04-07 23:04:49 41.330756 -94.471059 1 0 0 0 Adair, Iowa, US
5 21001.0 Adair Kentucky US 2020-04-07 23:04:49 37.104598 -85.281297 3 0 0 0 Adair, Kentucky, US
6 29001.0 Adair Missouri US 2020-04-07 23:04:49 40.190586 -92.600782 11 0 0 0 Adair, Missouri, US
7 40001.0 Adair Oklahoma US 2020-04-07 23:04:49 35.884942 -94.658593 25 1 0 0 Adair, Oklahoma, US
8 8001.0 Adams Colorado US 2020-04-07 23:04:49 39.874321 -104.336258 388 12 0 0 Adams, Colorado, US
9 16003.0 Adams Idaho US 2020-04-07 23:04:49 44.893336 -116.454525 1 0 0 0 Adams, Idaho, US
10 17001.0 Adams Illinois US 2020-04-07 23:04:49 39.988156 -91.187868 8 0 0 0 Adams, Illinois, US
11 18001.0 Adams Indiana US 2020-04-07 23:04:49 40.745765 -84.936714 2 0 0 0 Adams, Indiana, US
12 28001.0 Adams Mississippi US 2020-04-07 23:04:49 31.476698 -91.353260 24 0 0 0 Adams, Mississippi, US
13 31001.0 Adams Nebraska US 2020-04-07 23:04:49 40.524494 -98.501178 15 0 0 0 Adams, Nebraska, US
14 39001.0 Adams Ohio US 2020-04-07 23:04:49 38.845411 -83.471896 2 0 0 0 Adams,Ohio,US
15 42001.0 Adams Pennsylvania US 2020-04-07 23:04:49 39.871404 -77.216103 28 0 0 0 Adams, Pennsylvania, US
16 53001.0 Adams Washington US 2020-04-07 23:04:49 46.982998 -118.560173 27 0 0 0 Adams, Washington, US
17 55001.0 Adams Wisconsin US 2020-04-07 23:04:49 43.969747 -89.767828 2 0 0 0 Adams, Wisconsin, US
18 50001.0 Addison Vermont US 2020-04-07 23:04:49 44.032173 -73.141309 47 0 0 0 Addison, Vermont, US
19 45003.0 Aiken South Carolina US 2020-04-07 23:04:49 33.543380 -81.636454 28 1 0 0 Aiken, South Carolina, US
20 12001.0 Alachua Florida US 2020-04-07 23:04:49 29.678665 -82.359282 139 0 0 0 Alachua, Florida, US
21 37001.0 Alamance North Carolina US 2020-04-07 23:04:49 36.043470 -79.399761 24 0 0 0 Alamance, North Carolina, US
22 6001.0 Alameda California US 2020-04-07 23:04:49 37.646294 -121.892927 634 15 0 0 Alameda, California, US
23 8003.0 Alamosa Colorado US 2020-04-07 23:04:49 37.572506 -105.788545 4 0 0 0 Alamosa, Colorado, US
24 36001.0 Albany New York US 2020-04-07 23:04:49 42.600603 -73.977239 333 8 0 0 Albany, New York, US
25 56001.0 Albany Wyoming US 2020-04-07 23:04:49 41.654987 -105.723541 4 0 0 0 Albany, Wyoming, US
26 51003.0 Albemarle Virginia US 2020-04-07 23:04:49 38.020807 -78.554811 38 0 0 0 Albemarle, Virginia, US
27 28003.0 Alcorn Mississippi US 2020-04-07 23:04:49 34.880845 -88.579962 6 0 0 0 Alcorn, Mississippi, US
28 37003.0 Alexander North Carolina US 2020-04-07 23:04:49 35.922380 -81.177519 2 0 0 0 Alexander, North Carolina, US
29 51510.0 Alexandria Virginia US 2020-04-07 23:04:49 38.814003 -77.081831 126 0 0 0 Alexandria, Virginia, US
... ... ... ... ... ... ... ... ... ... ... ... ...
2565 37197.0 Yadkin North Carolina US 2020-04-07 23:04:49 36.162226 -80.662394 9 0 0 0 Yadkin, North Carolina, US
2566 53077.0 Yakima Washington US 2020-04-07 23:04:49 46.457385 -120.738013 380 12 0 0 Yakima, Washington, US
2567 28161.0 Yalobusha Mississippi US 2020-04-07 23:04:49 34.028242 -89.707620 11 0 0 0 Yalobusha, Mississippi, US
2568 41071.0 Yamhill Oregon US 2020-04-07 23:04:49 45.233304 -123.308696 25 3 0 0 Yamhill, Oregon, US
2569 46135.0 Yankton South Dakota US 2020-04-07 23:04:49 43.009245 -97.394676 15 0 0 0 Yankton, South Dakota, US
2570 36123.0 Yates New York US 2020-04-07 23:04:49 42.635055 -77.103699 1 0 0 0 Yates, New York, US
2571 4025.0 Yavapai Arizona US 2020-04-07 23:04:49 34.599339 -112.553859 57 0 0 0 Yavapai, Arizona, US
2572 28163.0 Yazoo Mississippi US 2020-04-07 23:04:49 32.778904 -90.396561 32 1 0 0 Yazoo, Mississippi, US
2573 5149.0 Yell Arkansas US 2020-04-07 23:04:49 35.002924 -93.411713 1 0 0 0 Yell, Arkansas, US
2574 27173.0 Yellow Medicine Minnesota US 2020-04-07 23:04:49 44.717102 -95.868425 1 0 0 0 Yellow Medicine, Minnesota, US
2575 30111.0 Yellowstone Montana US 2020-04-07 23:04:49 45.939559 -108.269149 47 0 0 0 Yellowstone, Montana, US
2576 6113.0 Yolo California US 2020-04-07 23:04:49 38.682789 -121.901829 50 1 0 0 Yolo, California, US
2577 23031.0 York Maine US 2020-04-07 23:04:49 43.478084 -70.710524 115 2 0 0 York, Maine, US
2578 31185.0 York Nebraska US 2020-04-07 23:04:49 40.872726 -97.597102 1 0 0 0 York, Nebraska, US
2579 42133.0 York Pennsylvania US 2020-04-07 23:04:49 39.921009 -76.730401 218 2 0 0 York, Pennsylvania, US
2580 45091.0 York South Carolina US 2020-04-07 23:04:49 34.972815 -81.180859 108 2 0 0 York, South Carolina, US
2581 51199.0 York Virginia US 2020-04-07 23:04:49 37.243748 -76.544128 25 1 0 0 York, Virginia, US
2582 48503.0 Young Texas US 2020-04-07 23:04:49 33.176597 -98.687909 3 0 0 0 Young, Texas, US
2583 6115.0 Yuba California US 2020-04-07 23:04:49 39.262559 -121.353564 11 0 0 0 Yuba, California, US
2584 2290.0 Yukon-Koyukuk Alaska US 2020-04-07 23:04:49 65.508155 -151.390739 1 0 0 0 Yukon-Koyukuk, Alaska, US
2585 4027.0 Yuma Arizona US 2020-04-07 23:04:49 32.768957 -113.906667 13 0 0 0 Yuma, Arizona, US
2586 8125.0 Yuma Colorado US 2020-04-07 23:04:49 40.003468 -102.425867 2 0 0 0 Yuma, Colorado, US
2587 48505.0 Zapata Texas US 2020-04-07 23:04:49 27.001564 -99.169872 1 0 0 0 Zapata, Texas, US
2603 88888.0 NaN Diamond Princess US 2020-04-07 23:04:49 NaN NaN 49 0 0 0 Diamond Princess, US
2612 99999.0 NaN Grand Princess US 2020-04-07 23:04:49 NaN NaN 103 3 0 0 Grand Princess, US
2615 66.0 NaN Guam US 2020-04-07 23:04:49 13.444300 144.793700 121 4 0 0 Guam, US
2642 NaN NaN Northern Mariana Islands US 2020-04-07 23:04:49 15.097900 145.673900 6 1 0 0 ,Northern Mariana Islands,US
2648 72.0 NaN Puerto Rico US 2020-04-07 23:04:49 18.220800 -66.590100 573 23 0 0 Puerto Rico, US
2653 NaN NaN Recovered US 2020-04-07 23:04:49 NaN NaN 0 0 21763 0 Recovered, US
2671 78.0 NaN Virgin Islands US 2020-04-07 23:04:49 18.335800 -64.896300 43 1 0 0 ,Virgin Islands,US

2595 rows × 12 columns

In [141]:
total = df_usa.sum()
total.name = "Total"
pd.DataFrame(total).transpose().loc[:,['Confirmed','Deaths']].style.background_gradient(cmap='prism',axis=1)
Out[141]:
Confirmed Deaths
Total 396223 12722
In [142]:
df_usa_styled = df_usa.loc[:,['Confirmed','Deaths','State']].groupby(['State']).sum().sort_values('Confirmed',ascending=False).style.background_gradient(cmap='Blues',subset=['Confirmed']).background_gradient(cmap='Reds',subset=['Deaths'])
df_usa_styled.to_excel('USA_COVID19_DATA.xlsx',engine='openpyxl')
df_usa_styled
Out[142]:
Confirmed Deaths
State
New York 139875 5489
New Jersey 44416 1232
Michigan 18970 845
California 17351 432
Louisiana 16284 582
Massachusetts 15202 356
Pennsylvania 14853 247
Florida 14545 283
Illinois 12271 308
Texas 8925 160
Georgia 8822 329
Washington 8692 400
Connecticut 7781 277
Indiana 5510 173
Colorado 5429 179
Ohio 4782 167
Maryland 4371 103
Tennessee 4139 72
Virginia 3335 66
North Carolina 3299 53
Missouri 3130 65
Arizona 2870 73
Wisconsin 2578 92
South Carolina 2417 51
Alabama 2169 64
Nevada 2124 72
Mississippi 1915 60
Utah 1746 13
Oklahoma 1474 67
Rhode Island 1229 30
District of Columbia 1211 22
Idaho 1170 13
Kentucky 1149 65
Oregon 1132 29
Minnesota 1069 34
Iowa 1046 25
Arkansas 946 18
Delaware 928 16
Kansas 912 29
New Mexico 876 13
New Hampshire 715 9
Vermont 575 23
Puerto Rico 573 23
Maine 519 12
Nebraska 447 10
West Virginia 412 4
Hawaii 410 5
South Dakota 320 6
Montana 319 6
North Dakota 237 4
Wyoming 216 0
Alaska 213 6
Guam 121 4
Grand Princess 103 3
Diamond Princess 49 0
Virgin Islands 43 1
Northern Mariana Islands 8 2
Recovered 0 0
In [ ]:
 
In [144]:
f = plt.figure(figsize=(10,5))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_usa.groupby(["State"]).sum().sort_values('Confirmed')["Confirmed"].index[-10:],df_usa.groupby(["State"]).sum().sort_values('Confirmed')["Confirmed"].values[-10:],color="darkcyan")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Confirmed Cases",fontsize=18)
plt.title("Top 10 States: USA (Confirmed Cases)",fontsize=20)
plt.grid(alpha=0.3)
plt.savefig('Top 10 States_USA (Confirmed Cases).png')
In [145]:
f = plt.figure(figsize=(10,5))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_usa.groupby(["State"]).sum().sort_values('Deaths')["Deaths"].index[-10:],df_usa.groupby(["State"]).sum().sort_values('Deaths')["Deaths"].values[-10:],color="crimson")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Deaths",fontsize=18)
plt.title("Top 10 States: USA (Deaths Cases)",fontsize=20)
plt.grid(alpha=0.3)
plt.savefig('Top 10 States_USA (Deaths Cases).png')
In [146]:
# url = "https://raw.githubusercontent.com/Subhash9325/GeoJson-Data-of-Indian-States/master/Indian_States"
# state_json = requests.get(url).json()
df_usa = df_usa.replace(np.nan, 0, regex=True)
usa = folium.Map(location=[37, -102], zoom_start=4,max_zoom=8,min_zoom=4)
for i in np.int32(np.asarray(df_usa[df_usa['Confirmed'] > 0].index)):
    folium.Circle(
        location=[df_usa.loc[i]['Lat'], df_usa.loc[i]['Long_']],
        tooltip = "<h5 style='text-align:center;font-weight: bold'>"+df_usa.loc[i]['State']+"</h5>"+
                    "<div style='text-align:center;'>"+str(np.nan_to_num(df_usa.loc[i]['County']))+"</div>"+
                    "<hr style='margin:10px;'>"+
                    "<ul style='color: #444;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
        "<li>Confirmed: "+str(df_usa.loc[i]['Confirmed'])+"</li>"+
        "<li>Active:   "+str(df_usa.loc[i]['Active'])+"</li>"+
        "<li>Recovered:   "+str(df_usa.loc[i]['Recovered'])+"</li>"+       
        "<li>Deaths:   "+str(df_usa.loc[i]['Deaths'])+"</li>"+
        "<li>Mortality Rate:   "+str(np.round(df_usa.loc[i]['Deaths']/(df_usa.loc[i]['Confirmed']+1)*100,2))+"</li>"+
        "</ul>"
        ,
        radius=int((np.log2(df_usa.loc[i]['Confirmed']+1))*6000),
        color='#ff6600',
        fill_color='#ff8533',
        fill=True).add_to(usa)

usa
Out[146]:
In [147]:
!tar cvfz allfiles.tar.gz *
COIVD-19-World.png
COVID-19 Exploratory Data Analysis.ipynb
daily confirmed cases countrywise.png
daily confirmed cases global.png
daily deaths cases Global.png
daily deaths reported countrywise.png
India_COVID19_data.xlsx
India_COVID19_totalData.xlsx
India States (Active Cases).png
India States (Confirmed Cases).png
India States (Deaths).png
India States (Recovered Cases).png
model_confirmed.h5
model_deaths.h5
Prediction Curve-Confirmed.png
spread.png
Top 10 Countries (Confirmed Cases).png
Top 10 Countries (Deaths Cases).png
Top 10 countries latest trends.png
Top 10 Countries (Recovered Cases).png
Top 10 States_USA (Confirmed Cases).png
Top 10 States_USA (Deaths Cases).png
USA_COVID19_DATA.xlsx
World_COVID19_data.xlsx
World_Total_COVID19_data.xlsx
In [ ]: